program define selection_endog, eclass 
	version 12.1
	syntax varlist(min=1) [if] [in], GEN(name) REFvar(varname) XMIN(real) XMAX(real) TITLEline(string) FILEname(string) IV(varlist) WAGESEEN(varname) DEP(varname) BOOTSTRAP(integer) STUBNEW(string) [HOMEgroup(integer 0) GRID(integer 50) CDFOFF(integer 0) SUBPREDD(integer 0) SUBPREDVAR(varname)  SMOOTHOFF(integer 0) SLEEP(integer 0)]

tempvar tempx tempy tempy_smooth
capture drop `gen'
	
local N_iterations=50

	*****************************
	* SET UP LOCAL -ifext-
	*****************************

if "`if'"=="" {
	
	local ifext="if `refvar'!=."
		
}
if "`if'"!="" {
	
	local ifext="`if' & `refvar'!=."
		
}

	*****************************
	* REGRESS
	*****************************

tempvar xbhat mills probit_sample

probit `wageseen' `iv' `varlist' `if' `in', iterate(`N_iterations')
local N_probit=e(N)

gen `probit_sample'=e(sample)
predict `xbhat' if e(sample)==1, xb
qui gen `mills'=normalden(`xbhat')/normprob(`xbhat')

	*****************************
	* REGRESS
	*****************************
	
if "`if'"=="" {
	local ifext="if `refvar'==`homegroup'"
}
if "`if'"!="" {
	local ifext="`if' & `refvar'==`homegroup'"
}

reg `dep' `varlist' `mills' `ifext' `in'

local N_wagereg=e(N)
local R2_wagereg=e(r2)

	*****************************
	* PREDICT
	*****************************

replace `mills'=0
	
if "`if'"=="" {	
	local ifext="if `refvar'!=. "
}
if "`if'"!="" {
	local ifext="`if' & `refvar'!=." 
}

qui predict `gen' `ifext' `in'

	*************************
	* NOW RESTRICT -gen-, IF SUB_PRED_D==1
	*************************

if `subpredd'==1 {
	qui replace `gen'=. if `subpredvar'!=1 & `gen'!=.
}

qui sum `gen'
local N_pred=r(N)

	*****************************
	* TEST
	*****************************

local sizeJ=0
qui levels `refvar', local(levels) 
foreach ll of local levels {
	local sizeJ=`sizeJ'+1
}

matrix define ksmirnov_pval=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_stat1srev=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1srev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
	foreach ll2 of local levels {	
			
		if `ll1'!=`ll2' {
			qui gen aux=1 if `refvar'==`ll1'
			qui replace aux=0 if `refvar'==`ll2'

			qui ksmirnov `gen' if `refvar'==`ll1' | `refvar'==`ll2', by(aux) /* exact */
			local pval=r(p_cor)
			local stat=r(D)
			local stat1s=r(D_1)
			local pval1s=r(p_1)
			local stat1srev=r(D_2)
			local pval1srev=r(p_2)
			matrix ksmirnov_pval[`count2',`count1'] = `pval'
			matrix ksmirnov_stat[`count2',`count1'] = `stat'
			matrix ksmirnov_stat1s[`count2',`count1'] = `stat1s'
			matrix ksmirnov_pval1s[`count2',`count1'] = `pval1s'
			matrix ksmirnov_stat1srev[`count2',`count1'] = `stat1srev'
			matrix ksmirnov_pval1srev[`count2',`count1'] = `pval1srev'
			drop aux
		}
		
		local count2=`count2'+1

	}
	
	local count1=`count1'+1
	local count2=1

}

if `cdfoff'==0 {

	*****************************
	* COMPUTE CDF
	*****************************

local h_factor=0.6

qui gen double `tempx'=`xmin' + (_n/`grid')*(`xmax'-`xmin') in 1/`grid'
label var `tempx' "predicted home wage according to covariates"

foreach ll1 of local levels {

	qui gen double `tempy'=.
	qui gen double `tempy_smooth'=.
	
		*********************************
		* CHOOSE BANDWIDTH
		*********************************
	
	qui sum `gen' if `gen'!=. &  `refvar'==`ll1' 
	local sd=r(sd)
	local N=r(N)
	local h_cdf=`h_factor'*1.06*`sd'*(`N'^(-1/5))
	
		*********************************
		* END BANDWIDTH
		*********************************
	
	forval nn=1/`grid' {
		qui gen tempaux=.
		qui replace tempaux=(`gen'<=`tempx'[`nn']) if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
		qui sum tempaux, mean
		qui replace `tempy'=r(mean) if _n==`nn'
		drop tempaux
	
		if `smoothoff'==0 {
			qui gen tempaux_smooth=.
			qui replace tempaux_smooth=normal((-`gen'+`tempx'[`nn'])/`h_cdf') if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
			qui sum tempaux_smooth, mean
			qui replace `tempy_smooth'=r(mean) if _n==`nn'
			drop tempaux_smooth
		}
	}
	
	tempvar tempy_`ll1'
	rename `tempy' `tempy_`ll1''
	
	tempvar tempy_smooth_`ll1'
	rename `tempy_smooth' `tempy_smooth_`ll1''

}

} /* cdfoff */

	*****************************
	* IMPORT DATA FROM BASE RUN
	*****************************

capture drop `stubnew'_*

if `bootstrap'==1 {
	getmata base_group base_gen base_sel, replace force
}
if `bootstrap'==0 {
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen=0
	qui gen `stubnew'_sel=`refvar'!=.
}

	*********************************
	* NOW MANUAL KS-TEST
	*********************************

matrix define KS_DiDmax=J(`sizeJ',`sizeJ',.)
matrix define KS_DiDabs=J(`sizeJ',`sizeJ',.)
matrix define KS_DiDmaxrev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
foreach ll2 of local levels {

	********************
	* START MANUAL KS-TEST
	********************

tempvar ks_group_d ks_sel_d base_group_d base_sel_d

qui gen `ks_group_d'=(`refvar'==`ll1') if ((`refvar'==`ll1') | (`refvar'==`ll2'))
qui gen `ks_sel_d'=((`refvar'==`ll1') | (`refvar'==`ll2')) & `gen'!=.

qui gen `base_group_d'=(`stubnew'_group==`ll1') if ((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2'))
qui gen `base_sel_d'=((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2')) & `stubnew'_gen!=.

ks_manual_DiD, d(`ks_group_d') x(`gen') sel(`ks_sel_d') dbase(`base_group_d') xbase(`stubnew'_gen) selbase(`base_sel_d')
local ks_DiD_abs=r(ks_DiD_abs)
local ks_DiD_0lt1=r(ks_DiD_0lt1)
local ks_DiD_1lt0=r(ks_DiD_1lt0)

matrix KS_DiDmax[`count2',`count1']=`ks_DiD_0lt1'
matrix KS_DiDabs[`count2',`count1']=`ks_DiD_abs'
matrix KS_DiDmaxrev[`count2',`count1']=`ks_DiD_1lt0'

drop `ks_group_d' `ks_sel_d' 

	********************
	* END MANUAL KS-TEST
	********************

local count2=`count2'+1

} /* ll2 */
	
local count1=`count1'+1
local count2=1
 
} /* l11 */

if `cdfoff'==0 {

local txt2=""
local legend_txt=""
local ll1_count=1
foreach ll1 of local levels {
	local txt2 "`txt2' (line `tempy_`ll1'' `tempx' in 1/`grid', sort connect(J))"
	qui sum `gen' if `refvar'==`ll1'
	local nn=r(N)
	local legend_txt `"`legend_txt' label(`ll1_count' "`refvar'=`ll1' [N=`nn']")"'
	local ll1_count=`ll1_count'+1
}

	* now smooth

local txt2=""
local legend_txt=""
local ll1_count=1
foreach ll1 of local levels {
	local txt2 "`txt2' (line `tempy_smooth_`ll1'' `tempx' in 1/`grid', sort connect(J))"
	qui sum `gen' if `refvar'==`ll1'
	local nn=r(N)
	local legend_txt `"`legend_txt' label(`ll1_count' "`refvar'=`ll1' [N=`nn']")"'
	local ll1_count=`ll1_count'+1
}

	*****************************
	* EXPORT DATA
	*****************************

preserve
	qui gen x_cdf=`tempx'
	local txt3="x_cdf"
	foreach ll1 of local levels {
		qui gen y_cdf_`ll1'=`tempy_`ll1'' 
		qui gen y_cdf_smooth_`ll1'=`tempy_smooth_`ll1''
		local txt3="`txt3' y_cdf_`ll1' y_cdf_smooth_`ll1'"
	}
	keep `txt3'
	qui keep if _n<=`grid'
	di "Note: output saved as output_`filename'. $S_DATE, $S_TIME"
	save ${output}/output_`filename', replace
restore

} /* cdfoff */

	*****************************
	* EXPORT STUBNEW
	*****************************

capture drop `stubnew'_*

if `bootstrap'==0 {	
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen = `gen'
	qui gen `stubnew'_sel=`refvar'!=. & `gen'!=.

}

	*****************************
	* PREPARE RETURN VECTOR
	*****************************

local home_unequal_KS=ksmirnov_stat[2,3]
local equal_home_KS=ksmirnov_stat[1,2]
local equal_unequal_KS=ksmirnov_stat[1,3]

local home_unequal_KS_pval=ksmirnov_pval[2,3]
local equal_home_KS_pval=ksmirnov_pval[1,2]
local equal_unequal_KS_pval=ksmirnov_pval[1,3]

local home_unequal_KS1s=ksmirnov_stat1s[2,3]
local equal_home_KS1s=ksmirnov_stat1s[1,2]
local equal_unequal_KS1s=ksmirnov_stat1s[1,3]

local home_unequal_KS1srev=ksmirnov_stat1srev[2,3]
local equal_home_KS1srev=ksmirnov_stat1srev[1,2]
local equal_unequal_KS1srev=ksmirnov_stat1srev[1,3]

local home_unequal_KS_pval1s=ksmirnov_pval1s[2,3]
local equal_home_KS_pval1s=ksmirnov_pval1s[1,2]
local equal_unequal_KS_pval1s=ksmirnov_pval1s[1,3]

local home_unequal_KS_pval1srev=ksmirnov_pval1srev[2,3]
local equal_home_KS_pval1srev=ksmirnov_pval1srev[1,2]
local equal_unequal_KS_pval1srev=ksmirnov_pval1srev[1,3]

local home_unequal_KS_DiDmax=KS_DiDmax[2,3]
local equal_home_KS_DiDmax=KS_DiDmax[1,2]
local equal_unequal_KS_DiDmax=KS_DiDmax[1,3]

local home_unequal_KS_DiDmaxrev=KS_DiDmaxrev[2,3]
local equal_home_KS_DiDmaxrev=KS_DiDmaxrev[1,2]
local equal_unequal_KS_DiDmaxrev=KS_DiDmaxrev[1,3]

local home_unequal_KS_DiDabs=KS_DiDabs[2,3]
local equal_home_KS_DiDabs=KS_DiDabs[1,2]
local equal_unequal_KS_DiDabs=KS_DiDabs[1,3]

	***************
	* PREPARE FOR OUTPUT
	***************

matrix define b=( `equal_home_KS',`home_unequal_KS',`equal_unequal_KS', ///
`equal_home_KS1s',`home_unequal_KS1s',`equal_unequal_KS1s', ///
`equal_home_KS1srev',`home_unequal_KS1srev',`equal_unequal_KS1srev', ///
`equal_home_KS_pval',`home_unequal_KS_pval',`equal_unequal_KS_pval', ///
`equal_home_KS_pval1s',`home_unequal_KS_pval1s',`equal_unequal_KS_pval1s', ///
`equal_home_KS_pval1srev',`home_unequal_KS_pval1srev',`equal_unequal_KS_pval1srev', ///
`home_unequal_KS_DiDabs',`equal_home_KS_DiDabs',`equal_unequal_KS_DiDabs', ///
`home_unequal_KS_DiDmax',`equal_home_KS_DiDmax',`equal_unequal_KS_DiDmax', ///
`home_unequal_KS_DiDmaxrev',`equal_home_KS_DiDmaxrev',`equal_unequal_KS_DiDmaxrev', ///
`N_wagereg',`R2_wagereg',`N_pred',`N_probit')

matrix colnames b="equal_home_KS" "home_unequal_KS" "equal_unequal_KS" ///
"equal_home_KS1s" "home_unequal_KS1s" "equal_unequal_KS1s" ///
"equal_home_KS1srev" "home_unequal_KS1srev" "equal_unequal_KS1srev" ///
"equal_home_KS_pval" "home_unequal_KS_pval" "equal_unequal_KS_pval" ///
"equal_home_KS_pval1s" "home_unequal_KS_pval1s" "equal_unequal_KS_pval1s" ///
"equal_home_KS_pval1srev" "home_unequal_KS_pval1srev" "equal_unequal_KS_pval1srev" ///
"home_unequal_KS_DiDabs" "equal_home_KS_DiDabs" "equal_unequal_KS_DiDabs" ///
"home_unequal_KS_DiDmax" "equal_home_KS_DiDmax" "equal_unequal_KS_DiDmax" ///
"home_unequal_KS_DiDmaxrev" "equal_home_KS_DiDmaxrev" "equal_unequal_KS_DiDmaxrev" ///
"N_wagereg" "R2_wagereg" "N_pred" "N_probit"

	*****************************
	* PREPARE -ERETURN-
	*****************************

qui gen sample=(`gen'!=.)
ereturn post b, esample(sample)

if `sleep'>0 {
	sleep `sleep'
}

end

	*
